**************************************************
**************************************************
**************************************************
**************************************************
**************************************************
**************************************************
**************************************************

****This do-file cleans the dataset used in the dofile 02_analysisB.do

capture cd "INCLUDE YOUR PAT HERE" 


***Set scheme for equal visualisation
set scheme s1mono

***Open and merge with lat long information
use "Microdades anonimitzades -863.dta", clear

append using "CEO857_2017 no anon.dta"
recode BOP_NUM (41=0 "Previous 1-O") (42=1 "After 1-O"), gen(post1o)

merge m:1 MUN using "police_interventions", nogen

***Merge with 2017 sociodemographic data
merge m:1 MUN using "dades_2017", nogen keep(matched)

***Merge with 1 october referendum results
merge m:1 MUN using "data_1o", nogen 
keep if PONDERA!=.

******Recodings***************
******************************

***Voting behaviour 1 October referendum
recode P82A (1/3=0 "Did not vote") (4=1 "Voted") (98/99=.), gen(vote1o)
label variable vote1o "Vote 1 October"

recode  P83 (4=1 "On the 1 Octobert") (1/3=0 "Had already decided") (else=.), gen(decisionvote) 
replace decisionvote=. if vote1o==0
label variable decisionvote "When did she decide to vote"

***Independent variables

*Identify where the police intervened
rename municipi_atacat raided_mun
label variable raided_mun "Raided polling station"

*Subjective National Identity
recode C700 (1/2=1 "Only and more Spanish than Catalan")(3=2 "Equally Catalan and Spanish") (4=3 "More Catalan than Spanish") (5=4 "Only Catalan") (98/99=.), gen(ins)
recode ins (1=.) (2=1 "Dual") (3=2 "More Catalan") (4=3 "Only Catalan"), gen(ins3_noesp)

*Language
recode C705 (1=1 "Catalan") (else=0 "Other languages and combinations"), gen(language)
label variable language "Main language: Catalan"

*LR scale
recode P25 (98/99=.), gen(ideol)
recode ideol (0/4=1 "Left") (5=2 "Centre") (6/10=3 "Right"), gen(ideol3)
label variable ideol "Ideology"

*Parties
recode P38_CENSR (1/6=1 "Against referendum") (10/15=2 "In favour referendum") (16=3 "Ambiguous") (else=0 "Abstention and others"), gen(party_type)
label variable party_type "Type of party"

*Age
recode GR_EDAT (1/2=1 "De 18 a 34 anys") (3=2 "De 35 a 49 anys") (4=3 "De 50 a 64 anys") (5=4 "Més de 64 anys"), gen(age)
label variable age "Age (groups)"

*Gender
recode SEXE (1=0 "Man") (2=1 "Woman"), gen(female)
label variable female "Gender (Female)"

*Size of habitat
recode HABITAT (1/2=0 "Less than 10.000 inhabitants") (3/6=1 "More than 10.000 inhabitants"), gen(habitat2)
label variable habitat2 "Size of municipality"

*Place of birth
recode C100 (1=1 "Catalonia") (else=0 "Outside Catalonia"), gen(origin)
label variable origin "Place of birth"

*Education
rename C500_REC education
label variable education "Education"

*Population
gen log_population17 = log(total_17)
label variable log_population17 "(log) Population 2017"
recode total_17 (0/1000000=0) (1620809=1), gen(barcelona)
label variable barcelona "Barcelona dummy"

*Number of policeman / 10.000 inhabitants
gen number_policemen_polling = number_policemen_close 
replace number_policemen_polling=0 if raided_mun==0 
replace number_policemen_polling= number_policemen_polling*10000/total_17
label variable number_policemen_polling "N. of policemen/10,000 inhabitants"

*Distance to polling stations
replace distance = distance/1000
replace distance =0 if raided_mun==1
gen log_distance = log(1+distance)
label variable log_distance "(Log) Distance to nearest raid polling station"

gen log_distance_sq = log_distance^2
label variable log_distance_sq "(Log) Distance squared to nearest raid polling station^2"

*Varibale logdistance in three categories: police intervened, no intervention near the municipality (50% of the municipalities, up until 11.5km), no intervention and far away.
sum log_distance if log_distance>0, d // 2.424135
recode log_distance (0=0 "Attacked") (0.01/2.424135=1 "Unattacked (attacked close)") (2.424136/4.5=2 "Unattacked (attacked far)"), gen(distance_3cat)

*****Define control variables********
*************************************

global controls "i.party_type origin i.ins log_population17 i.barcelona i.age i.female i.education"
global controls_noins "i.party_type origin log_population17 i.barcelona i.age i.female i.education"
